#############################################################
# SCRIPT: Analysis of programs of Radio Popular de Loyola
############################################################

################################################################################
##### Packages and directories #####
################################################################################

## Add your own root directory
#setwd("Replication package/Programs")


## Clean the workspace
rm(list=ls())


## Packages
library(tidyverse)
library(readxl)
library(patchwork)
library(ggpubr)
library(viridis)
library(wordcloud2)

## Directory of the data
indir <- "Data/"

## Directory where to save figures
outdir <- "Output/Figures/"



## Report dates in English language
Sys.setlocale("LC_TIME", "English")


################################################################################
##### Figures F1 and 1a: Linguistic composition of local news #####
################################################################################

## Import the data
p67 <- read_excel(paste0(indir, "Radio Popular de Loyola Febrero 1967.xlsx"),
                  sheet = "Programs")
colnames(p67) <- c("date", "time", "title", "inform", "type", "cult_ent", "relig", "mono_ESP", "mono_EUS", "bilingual", "lines_ESP", "lines_EUS", "mentions", "notes")

p69 <- read_excel(paste0(indir, "Radio Popular de Loyola Julio 1969.xlsx"),
                  sheet = "Programs")
colnames(p69) <- c("date", "time", "title", "inform", "type", "cult_ent", "relig", "mono_ESP", "mono_EUS", "bilingual", "lines_ESP", "lines_EUS", "mentions")

p74 <- read_excel(paste0(indir, "Radio Popular de Loyola Enero 1974.xlsx"),
                  sheet = "Programs")
colnames(p74) <- c("date", "time", "title", "inform", "type", "cult_ent", "relig", "mono_ESP", "mono_EUS", "bilingual", "lines_ESP", "lines_EUS", "mentions")

p75 <- read_excel(paste0(indir, "Radio Popular de Loyola Noviembre 1975.xlsx"),
                  sheet = "Programs")
colnames(p75) <- c("date", "time", "title", "inform", "type", "cult_ent", "relig", "mono_ESP", "mono_EUS", "bilingual", "lines_ESP", "lines_EUS", "mentions")

p78 <- read_excel(paste0(indir, "Radio Popular de Loyola Marzo 1978.xlsx"),
                  sheet = "Programs")
colnames(p78) <- c("date", "time", "title", "inform", "type", "cult_ent", "relig", "mono_ESP", "mono_EUS", "bilingual", "lines_ESP", "lines_EUS", "mentions")


## Create proportion of Euskera in local news lines

# Function that takes a dataset of programs and computes the linguistic share of news
create_share <- function(df) {
  df <- mutate(df, share_EUS = lines_EUS/(lines_ESP + lines_EUS))
  df
}

# Compute the share for each month
lst <- lapply(list(p67, p69, p74, p75, p78), create_share)

# Extract the new data frames
p67 <- lst[[1]]
p69 <- lst[[2]]
p74 <- lst[[3]]
p75 <- lst[[4]]
p78 <- lst[[5]]


# Keep clean the memory
rm(lst)


## Plot composition over time by month
g1 <- p67 %>% filter(title == "Boletin de informacion local") %>% 
  ggplot(aes(x=date, y=share_EUS)) + geom_line() +
  geom_hline(yintercept = mean(p67$share_EUS[p67$title == "Boletin de informacion local"], na.rm=T), color="red", linetype="dashed") +
  xlab("") + ylab("") + ggtitle("1967 (Feb.)") +
  scale_x_datetime(date_breaks = "3 days", date_labels = "%b %d") +
  scale_y_continuous(breaks = seq(0,1,by=0.25), limits = c(0,1)) +
  theme_minimal() + theme(axis.text.x = element_text(size = 8, angle=45)) 

# From 1969 we exclude the evening news, which report market prices
g2 <- p69 %>% filter(title == "Panorama local" & time %in% c("13", "14")) %>%
  ggplot(aes(x=date, y=share_EUS)) + geom_line() +
  geom_hline(yintercept = mean(p69$share_EUS[p69$title == "Panorama local" & p69$time %in% c("13", "14")], na.rm=T), color="red", linetype="dashed") +
  xlab("") + ylab("") + ggtitle("1969 (Jul.)") +
  scale_x_datetime(date_breaks = "7 days", date_labels = "%b %d") +
  scale_y_continuous(breaks = seq(0,1,by=0.25), limits = c(0,1)) +
  theme_minimal() + theme(axis.text.x = element_text(size = 8, angle=45)) 

g3 <- p74 %>% filter(title == "Panorama local") %>%
  ggplot(aes(x=date, y=share_EUS)) + geom_line() +
  geom_hline(yintercept=mean(p74$share_EUS[p74$title=="Panorama local"], na.rm=T), color="red", linetype="dashed") +
  xlab("") + ylab("") + ggtitle("1974 (Jan.)") +
  scale_x_datetime(date_breaks = "7 days", date_labels = "%b %d") +
  scale_y_continuous(breaks = seq(0,1,by=0.25), limits = c(0,1)) +
  theme_minimal() + theme(axis.text.x = element_text(size = 8, angle=45)) 


g4 <- p75 %>% filter(title == "Panorama local") %>% 
  ggplot(aes(x=date, y=share_EUS)) + geom_line() +
  geom_hline(yintercept=mean(p75$share_EUS[p75$title=="Panorama local"], na.rm=T), color="red", linetype="dashed") +
  xlab("") + ylab("") + ggtitle("1975 (Nov.)") +
  scale_x_datetime(date_breaks = "7 days", date_labels = "%b %d") +
  scale_y_continuous(breaks = seq(0,1,by=0.25), limits = c(0,1)) +
  theme_minimal() + theme(axis.text.x = element_text(size = 8, angle=45)) 


g5 <- p78 %>% filter(title == "Gaur non zer") %>%
  ggplot(aes(x=date, y=share_EUS)) + geom_line() +
  geom_hline(yintercept = mean(p78$share_EUS[p78$title=="Gaur non zer"], na.rm=T), color="red", linetype="dashed") +
  xlab("") + ylab("") + ggtitle("1978 (Mar.)") +
  scale_x_datetime(date_breaks = "7 days", date_labels = "%b %d") +
  scale_y_continuous(breaks = seq(0,1,by=0.25), limits = c(0,1)) +
  theme_minimal() + theme(axis.text.x = element_text(size = 8, angle=45)) 

# Paste together
ggarrange(g1, g2, g3, g4, g5, 
          ncol = 3, nrow = 2,
          common.legend = T, align = "hv") %>%
  annotate_figure(left = "Share of news lines in Euskera",
                  bottom = "Date")


# Save and export
ggsave(filename = "figureF1.pdf", 
       path = outdir,
       width = 16, height = 12, units = "cm")


## Plot the average composition together (first panel of figure 1)
lines_means <- data.frame(period = c("1967 (Feb.)", "1969 (Jul.)", "1974 (Jan.)", "1975 (Nov.)", "1978 (Mar.)"),
                          Euskera = c(mean(p67$share_EUS[p67$title=="Boletin de informacion local"], na.rm=T),
                                      mean(p69$share_EUS[p69$title=="Panorama local" & p69$time%in%c("13", "14")], na.rm=T),
                                      mean(p74$share_EUS[p74$title=="Panorama local"], na.rm=T),
                                      mean(p75$share_EUS[p75$title=="Panorama local"], na.rm=T),
                                      mean(p78$share_EUS[p78$title=="Gaur non zer"], na.rm=T)))



(means <- lines_means %>% 
    ggplot(aes(x=period, y=Euskera)) + 
    geom_bar(stat="identity", colour = "blue", fill = "white") +
    xlab("Year (month)") + ylab("Avg. share of news lines in Euskera") +
    scale_x_discrete(breaks = c("1967 (Feb.)", "1969 (Jul.)", "1974 (Jan.)", "1975 (Nov.)", "1978 (Mar.)"),
                     labels = c("1967\n(Feb.)", "1969\n(Jul.)", "1974\n(Jan.)", "1975\n(Nov.)", "1978\n(Mar.)")) +
    theme_bw(base_size = 10))




################################################################################
##### Figure 1b: Time position of Basque-only programs #####
################################################################################

## Dummies for morning, afternoon and evening and collapse at
#  the program level

e1 <- p67 %>% mutate(time2 = round(as.numeric(str_replace_all(time, ",", ".")), 2)) %>%
  mutate(year = "1967") %>%
  mutate(morning = ifelse(time2 < 13, 1, 0), afternoon = ifelse(time2 >= 13 & time2 < 19, 1, 0), evening = ifelse(time2 >= 19, 1, 0)) %>%
  filter(mono_EUS == 1 & !is.na(time2)) %>% select(c("year", "title", "morning", "afternoon", "evening")) %>% unique()

e2 <- p69 %>% mutate(time2 = round(as.numeric(str_replace_all(time, ",", ".")), 2)) %>%
  mutate(year = "1969") %>%
  mutate(morning = ifelse(time2 < 13, 1, 0), afternoon = ifelse(time2 >= 13 & time2 < 19, 1, 0), evening = ifelse(time2 >= 19, 1, 0)) %>%
  filter(mono_EUS == 1 & !is.na(time2)) %>% select(c("year", "title", "morning", "afternoon", "evening")) %>% unique() 

e3 <- p74 %>% mutate(time2 = round(as.numeric(str_replace_all(time, ",", ".")), 2)) %>%
  mutate(year = "1974") %>%
  mutate(morning = ifelse(time2 < 13, 1, 0), afternoon = ifelse(time2 >= 13 & time2 < 19, 1, 0), evening = ifelse(time2 >= 19, 1, 0)) %>%
  filter(mono_EUS == 1 & !is.na(time2)) %>% select(c("year", "title", "morning", "afternoon", "evening")) %>% unique() 

e4 <- p75 %>% mutate(time2 = round(as.numeric(str_replace_all(time, ",", ".")), 2)) %>%
  mutate(year = "1975") %>%
  mutate(morning = ifelse(time2 < 13, 1, 0), afternoon = ifelse(time2 >= 13 & time2 < 19, 1, 0), evening = ifelse(time2 >= 19, 1, 0)) %>%
  filter(mono_EUS == 1 & !is.na(time2)) %>% select(c("year", "title", "morning", "afternoon", "evening")) %>% unique() 

e5 <- p78 %>% mutate(time2 = round(as.numeric(str_replace_all(time, ",", ".")), 2)) %>%
  mutate(year = "1978") %>%
  mutate(morning = ifelse(time2 < 13, 1, 0), afternoon = ifelse(time2 >= 13 & time2 < 19, 1, 0), evening = ifelse(time2 >= 19, 1, 0)) %>%
  filter(mono_EUS == 1 & !is.na(time2)) %>% select(c("year", "title", "morning", "afternoon", "evening")) %>% unique() 

## Compute unique variable for time slot
eus_position <- rbind(e1, e2, e3, e4, e5) 
eus_position$position <- NA
eus_position$position <- ifelse(eus_position$morning == 1, "1 - Morning", eus_position$position)
eus_position$position <- ifelse(eus_position$afternoon == 1, "2 - Afternoon", eus_position$position)
eus_position$position <- ifelse(eus_position$evening == 1, "3 - Evening", eus_position$position)

eus_position$position <- as.factor(eus_position$position)


## Total of Basque-only programs per slot per year
eus_position <- eus_position %>% group_by(year, position) %>% count()

## Add also the 0 categories
eus_position <- eus_position %>% ungroup %>%
  add_row(year = "1969", position = "1 - Morning", n = 0) %>%
  add_row(year = "1969", position = "2 - Afternoon", n = 0) %>%
  add_row(year = "1974", position = "1 - Morning", n = 0) %>%
  add_row(year = "1974", position = "2 - Afternoon", n = 0) %>%
  add_row(year = "1975", position = "1 - Morning", n = 0) %>%
  add_row(year = "1975", position = "2 - Afternoon", n = 0) %>%
  add_row(year = "1978", position = "2 - Afternoon", n = 0)

## Plot
(prog_time <- ggplot(eus_position, aes(x=year, y=n, fill=position)) +
    geom_bar(color = "black", stat="identity", position=position_dodge()) +
    xlab("Year (Month)") + ylab("Euskera-only programs") + 
    scale_x_discrete(labels = c("1967\n(Feb.)", "1969\n(Jul.)", "1974\n(Jan.)", "1975\n(Nov.)", "1978\n(Mar.)")) +
    scale_fill_brewer(name="Time allocation", palette="Blues", labels=c("Morning (7am - 1pm)", "Afternoon (1pm - 7pm)", "Evening (7pm-12am)")) +
    theme_bw(base_size = 10) )


## Combine news composition and Basque-only programs to generate Figure 1
(means_and_progs <- means + prog_time)

## Export plot
ggsave(filename = "figure1.pdf", 
       path = outdir,
       width = 16, height=10, units = "cm")



## Clean
rm(p67, p69, p74, p75, p78, g1, g2, g3, g4, g5, e1, e2, e3, e4, e5)

################################################################################
##### Figure F2: Linguistic composition in music #####
################################################################################

## Import the new data
p67 <- read_excel(paste0(indir, "Radio Popular de Loyola Febrero 1967.xlsx"),
                  sheet = "Music")
colnames(p67) <- c("date", "time", "title", "songs_esp", "songs_eus", "songs_other", "notes")

p69 <- read_excel(paste0(indir, "Radio Popular de Loyola Julio 1969.xlsx"),
                  sheet = "Music")
colnames(p69) <- c("date", "time", "title", "songs_esp", "songs_eus", "songs_other", "notes")

p74 <- read_excel(paste0(indir, "Radio Popular de Loyola Enero 1974.xlsx"),
                  sheet = "Music")
colnames(p74) <- c("date", "time", "title", "songs_esp", "songs_eus", "songs_other", "notes", "notes2")

p75 <- read_excel(paste0(indir, "Radio Popular de Loyola Noviembre 1975.xlsx"),
                  sheet = "Music")
colnames(p75) <- c("date", "time", "title", "songs_esp", "songs_eus", "songs_other", "notes")

p78 <- read_excel(paste0(indir, "Radio Popular de Loyola Marzo 1978.xlsx"),
                  sheet = "Music")
colnames(p78) <- c("date", "time", "title", "songs_esp", "songs_eus", "songs_other", "notes", "notes2")




#### Compute song shares #####

# Function that takes a dataset of programs and computes daily share of songs by language
song_share <- function(df) {
  require(dplyr); require(tidyr) 
  df <- df %>% mutate_at(vars(starts_with("songs")), ~replace_na(.,0))
  df <- df %>% mutate(songs_tot = songs_esp + songs_eus + songs_other)
  df <- df %>% select(c("date",starts_with("songs"))) %>%
    group_by(date) %>% summarise_all(sum) %>% ungroup()
  df <- df %>% mutate(share_esp = songs_esp/songs_tot,
                      share_eus = songs_eus/songs_tot,
                      share_other = songs_other/songs_tot)
  df
}

# Function that reshapes to long 
reshape_df <- function(df) {
  require(tidyr)
  df <- df %>% select(c("date", starts_with("share")))
  df <- pivot_longer(df, cols = c(starts_with("share")),
                     names_to = "language", values_to = "share")
  df <- df %>% mutate(language = case_when(language == "share_esp" ~ "Spanish",
                                           language == "share_eus" ~ "Euskera",
                                           language == "share_other" ~ "Other"))
  df
}

# Wrapper
custom_songs <- function(df) {
  df <- reshape_df(song_share(df))
  df
}

# Compute the shares for each month
lst <- lapply(list(p67, p69, p74, p75, p78), custom_songs)

# Extract the new data frames
p67 <- lst[[1]]
p69 <- lst[[2]]
p74 <- lst[[3]]
p75 <- lst[[4]]
p78 <- lst[[5]]


# Keep clean the memory
rm(lst)

# 1967
g1 <- p67 %>% ggplot(aes(x=date, y=share, col=language)) + geom_point() + geom_line() +
  scale_colour_viridis(discrete = T, name = "Language", labels=c("Euskera", "Other", "Spanish")) +
  labs(x = "", y = "", title = "1967 (Feb.)") +
  scale_x_datetime(date_breaks = "3 days", date_labels = "%b %d") +
  scale_y_continuous(breaks = seq(0,1,0.25), limits = c(0,1)) + 
  theme_minimal() + theme(axis.text.x = element_text(size = 8, angle = 45))

# 1969
g2 <- p69 %>% ggplot(aes(x=date, y=share, col=language)) + geom_point() + geom_line() +
  scale_colour_viridis(discrete = T, name = "Language", labels=c("Euskera", "Other", "Spanish")) +
  labs(x = "", y = "", title = "1969 (Jul.)") +
  scale_x_datetime(date_breaks = "7 days", date_labels = "%b %d") +
  scale_y_continuous(breaks = seq(0,1,0.25), limits = c(0,1)) + 
  theme_minimal() + theme(axis.text.x = element_text(size = 8, angle = 45))

# 1974
g3 <- p74 %>% ggplot(aes(x=date, y=share, col=language)) + geom_point() + geom_line() +
  scale_colour_viridis(discrete = T, name = "Language", labels=c("Euskera", "Other", "Spanish")) +
  labs(x = "", y = "", title = "1974 (Jan.)") +
  scale_x_datetime(date_breaks = "7 days", date_labels = "%b %d") +
  scale_y_continuous(breaks = seq(0,1,0.25), limits = c(0,1)) + 
  theme_minimal() + theme(axis.text.x = element_text(size = 8, angle = 45))

# 1975
g4 <- p75 %>% ggplot(aes(x=date, y=share, col=language)) + geom_point() + geom_line() +
  scale_colour_viridis(discrete = T, name = "Language", labels=c("Euskera", "Other", "Spanish")) +
  labs(x = "", y = "", title = "1975 (Nov.)") +
  scale_x_datetime(date_breaks = "7 days", date_labels = "%b %d") +
  scale_y_continuous(breaks = seq(0,1,0.25), limits = c(0,1)) + 
  theme_minimal() + theme(axis.text.x = element_text(size = 8, angle = 45))

# 1978
g5 <- p78 %>% ggplot(aes(x=date, y=share, col=language)) + geom_point() + geom_line() +
  scale_colour_viridis(discrete = T, name = "Language", labels=c("Euskera", "Other", "Spanish")) +
  labs(x = "", y = "", title = "1978 (Mar.)") +
  scale_x_datetime(date_breaks = "7 days", date_labels = "%b %d") +
  scale_y_continuous(breaks = seq(0,1,0.25), limits = c(0,1)) + 
  theme_minimal() + theme(axis.text.x = element_text(size = 8, angle = 45)) 



# Put all together
ggarrange(g1, g2, g3, g4, g5, 
          ncol = 3, nrow = 2,
          common.legend = T, align = "hv", legend = "right") %>%
  annotate_figure(left = "Share of songs",
                  bottom = "Date")


# Save and export
ggsave(filename = "figureF2.pdf", 
       path = outdir,
       width = 16, height = 12, units = "cm")

## Clean
rm(p67, p69, p74, p75, p78, g1, g2, g3, g4, g5, e1, e2, e3, e4, e5)


################################################################################
##### Figures F3 and F4: Mentions of political facts after democratic transition #####
################################################################################

## Import the data
d <- read_excel(paste0(indir, "Radio Popular de Loyola Marzo 1978.xlsx"), sheet = "Mentions") %>%
  select(c("People", "Who is", "Organization", "Stands for", "Topics"))


##### Mentions of people #####
## Drop two people that could not be identified
ppl <- d %>% select(c("People", "Who is")) %>% filter(!is.na(`Who is`)) %>%
  rename(who = `Who is`)

## Clean and make the bar graph
ppl <- mutate(ppl, who = str_trim(str_replace_all(who, "Member", ""))) %>%
  mutate(who = str_replace_all(who, c("\\(" = "\n \\(", "independentist" = "\n independentist")))
ppl <- ggplot(ppl, aes(x=who)) +
  geom_bar(col="darkgreen", fill="white") + coord_flip() +
  scale_y_continuous(breaks = seq(0,10,2), limits = c(0,11)) +
  labs(x="Affiliation", y="", title = "Individuals") + 
  theme_minimal()


##### Mentions of parties and groups #####
orgs <- d %>% select(c("Organization", "Stands for")) %>%
  filter(!is.na(`Stands for`)) %>%
  rename(org = Organization, what = `Stands for`)

## Assign categories
orgs <- mutate(orgs, what = case_when(org == "PNV" ~ "PNV",
                                      org %in% c("HASI", "LAIA","EIA", "LKI",
                                                 "UNAI", "Euskadiko Ezkerra",
                                                 "Altsasuko Mahaia",
                                                 "Izquierda Abertzale") ~ "Rad. Nat.",
                                      org %in% c("PCE", "Organizacion revolucionaria de trabajadores",
                                                 "PSP", "PSOE", "PTE") ~ "Left",
                                      org %in% c("ETA-m", "ETA") ~ "ETA",
                                      TRUE ~ what))

## Make the bar graph
orgs <- ggplot(orgs, aes(x=what)) +
  geom_bar(col="darkred", fill="white") + coord_flip() +
  scale_y_continuous(breaks = seq(0,10,2), limits=c(0,10)) +
  labs(x="Category", y="", title = "Organizations") +
  theme_minimal()

##### Figure F3: The two plots combined #####
ggarrange(ppl, orgs, nrow = 1) %>% annotate_figure(bottom = "Mentions")


ggsave(filename = "figureF3.pdf",
       path = outdir,
       width = 12, units = "cm")



##### Figure F4: Topics mentioned #####
topics <- d %>% select(Topics)

## Compute word frequencies
topics <- topics %>% group_by(Topics) %>% summarise(count = n())

## Topic representation

# The wordcloud2 package appears to randomly perturb the position of words
# at every execution, even with a random seed. Therefore the figure
# will not match exactly that of the paper

# To see the underlying distribution, run the following line
topics %>% arrange(-count) %>% print(n=Inf)

## Word cloud
set.seed(1)
cloud <- wordcloud2(topics, fontFamily="mono", size=0.5,
                    minRotation = 0, maxRotation = 1.5708,
                    rotateRatio = 0.3, shape = "circle",
                    shuffle=F)

cloud 

## The image has been saved manually from here